---
title: "Replication Code for 'Measuring Democratic Backsliding'"
author: "Kamya Yadav and Andrew Little"
date: 7/18/2023
output: pdf_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)

#load libraries 
library(tidyverse)
library(stargazer)
# library(kableExtra)
library(haven)
library(foreign)
library(dplyr)

rm(list=ls())
#set working directory 
setwd("~/Dropbox/backsliding_replication/final_data")

# Directory for image output on Andrew's computer
outdir = "~/Dropbox/Apps/Overleaf/media coverage of backsliding (1)/_img"

# Default years for graphs
# For now stopping at 2021 even though Vdem goes to 2022
yrs = 1980:2021



```

```{r data}


#load data 

## freedom house data 
fh = read.csv("fh.csv")

## vdem data 
# vdem = readRDS("v_dem.rds")
vdem = readRDS("v_dem13.rds")

## executive constraints 
exec_cons = read_dta("Meng_exec_constraints_updated.dta")

## NELDA 
nelda = read.csv("NELDA.csv", header = T)

## Boix, Miller, Rosato democracy 
load("bmr_democracy.RData")
bmr = table

## Database of pol institutions
dpi = read.csv("DPI2020.csv")

## CPJ journalist data 
cpj_killed = read.csv("cpj_journalists_killed.csv")

cpj_imprison = read.csv("cpj_journalists_imprisoned.csv")

## polity 5 dataset
polity = read.csv("polity5.csv")
```

```{r figure 1 comparing vdem, fh, polity}

# Changing to output directory
setwd(outdir)

# Polity data stops after 2018 (except US)
pol_yrs = 1980:2018

#Freedom house is missing 1981 due to weird quirk
fh_yrs = yrs[-2]

# Figure 1
fh_year = tapply(fh$fh_total_reversed[fh$year %in% yrs], 
                 fh$year[fh$year %in% yrs], mean, na.rm=TRUE)
pol_year = tapply(polity$polity2[polity$year %in% pol_yrs], 
                  polity$year[polity$year %in% pol_yrs], mean, na.rm=TRUE)
vdem_year = tapply(vdem$v2x_polyarchy[vdem$year %in% yrs], vdem$year[vdem$year %in% yrs], mean, na.rm=TRUE)

pdf("averages.pdf", width=6.5, height=3, family="Times")
par(mfrow=c(1,3), mar=c(3,3,1,1), mgp=c(2,.7,0))
plot(yrs, vdem_year, type="l", ylim=c(0,1), xlim=c(1980, 2021),
     xlab="Year", ylab="Average Polyarchy Score", lwd=2)
abline(h=vdem_year[length(vdem_year)], lty=3)
text(1990, .54, "2021 Average", cex=1)

plot(fh_yrs, fh_year/12, type="l", ylim=c(0,1), xlim=c(1980, 2021),
     xlab="Year", ylab="Average Normalized FH Score", lwd=2)
abline(h=fh_year[length(fh_year)]/12, lty=3)
text(1990, .62, "2021 Average", cex=1)

plot(pol_yrs, (pol_year+10)/20, type="l", ylim=c(0,1), xlim=c(1980, 2021), lwd=2,
     xlab="Years", ylab="Average Normalized Polity")
lastpol = pol_year[length(pol_year)]
abline(h=(pol_year[names(pol_year)=="2018"]+10)/20, lty=3)
text(1990, .73, "2018 Average", cex=1)
dev.off()

```

```{r fig2}
setwd(outdir)

# Function to plot average of variable by year
plotavgyr = function(variable=vdem$v2x_polyarchy, 
                      year=vdem$year,
                      minyear=2000, maxyear=2021,
                      xlab="Year", ylab="Average", main="", ylim=NULL, add=FALSE,
                      lwd=1, col="black"){
  avg_yr = tapply(variable, year, mean, na.rm=TRUE)
  minindex = which(as.numeric(names(avg_yr))==minyear)
  maxindex = which(as.numeric(names(avg_yr))==maxyear)
  avg_yr = avg_yr[minindex:maxindex]
  if (add) lines(names(avg_yr), avg_yr, col=col, lwd=lwd)
  if (!add)  plot(names(avg_yr), avg_yr, type="l", xlab=xlab, ylab=ylab, 
                  main=main, ylim=ylim,
                  col=col, lwd=lwd)
}

# VDem by subindices (for the appendix)
pdf("subjobj.pdf", width=6.5, height=3, family="Times")
par(mfrow=c(1,2), mar=c(3,3,1,1), mgp=c(2, .7, 0))
plotavgyr(var=vdem$v2x_freexp_altinf, ylab="Average Subjective Subindices",
          minyear=1980, ylim=c(.3, .7), lwd=2, col=rgb(.8,0,0))
plotavgyr(var=vdem$v2x_frassoc_thick, add=TRUE, minyear=1980, lwd=2, col=rgb(0,.8,0))
plotavgyr(var=vdem$v2xel_frefair, add=TRUE, minyear=1980, lwd=2, col=rgb(0,0,.8))
text(1990, .65, "Expression", col=rgb(.8,0,0))
text(2005, .61, "Association", col=rgb(0,.8,0))
text(2005, .47, "Elections", col=rgb(0,0,.8))


plotavgyr(var=vdem$v2x_suffr, ylab="Average Objective Subindices",
          minyear=1980, ylim=c(.6, 1), lwd=2)
text(2000, .99, "Suffrage")
text(2008, .78, "Elected Officials", col="grey")
plotavgyr(var=vdem$v2x_elecoff, add=TRUE, minyear=1980, lwd=2, col="grey")
dev.off()

```

```{r nelda_recoding}
nelda$increp = ifelse(nelda$nelda39=="yes", 1, 0)
# Coding not applicable as NA for this one, will drop out for graphs
nelda$increp[nelda$nelda39=="N/A"]=NA
# Only 3 cases where incumbent contested is n/a or unclear so just 
# setting these to NA too
nelda$inccontest = ifelse(nelda$nelda20=="yes",1,0)
table(nelda$increp, nelda$inccontest)

nelda$partylose = ifelse(nelda$nelda24=="yes",1,0)
nelda$partylose[nelda$nelda24 %in% c("N/A", "unclear")] = NA

# alternative coding (not used in paper, trends look similar):
# counting it as a party loss as well if n/a or unclear, BUT the incumbency
# is at stake and the incumbent is replaced
nelda$partylose2 = nelda$partylose
nelda$partylose2[nelda$nelda24 %in% c("N/A", "unclear") & 
                   nelda$inccontest==1&
                   nelda$increp==1] = 1
# If party is n/a or unlear, incumbency is at stake, and no
# replacement coding party loss as 0
nelda$partylose2[nelda$nelda24 %in% c("N/A", "unclear") & 
                   nelda$inccontest==1&
                   nelda$increp==0] = 0

nelda$oppgain = ifelse(nelda$nelda28=="yes",1,0)
nelda$oppgain[nelda$nelda24 %in% c("N/A", "unclear")] = NA

# Not a ton of NAs/unclears for these variables, and the
# "undemocratic" outcome is pretty rare
# Since they are getting pulled into an index let's assume
# they are equal to the "good" value if n/a unclear
nelda$suspended = ifelse(nelda$nelda1=="yes", 1, 0)
nelda$oppallowed = ifelse(nelda$nelda3 %in% c("yes", "N/A", "unclear"), 1, 0)
nelda$multilegal = ifelse(nelda$nelda4 %in% c("yes", "N/A", "unclear"),1,0)
nelda$choice = ifelse(nelda$nelda5 %in% c("yes", "N/A", "unclear"),1,0)

# Lot of NAs for prior term limit evasion
# Coding the NAs as 0 because there wasn't a past evasion (even if not possible)
nelda$pasttl = ifelse(nelda$nelda9=="yes", 1, 0)

# Note the NAs here are typically cases where the opposition is illegal
# Keeping these as zeros as our index adds these, so either banning 
# all opposition or specific individuals sums to 1
nelda$oppprevent = ifelse(nelda$nelda13=="yes", 1, 0)

# Not dropping the NAs where parties are banned
nelda$boycott = ifelse(nelda$nelda14=="yes", 1, 0)


# Index with (1) opposition allowed and not prevented from running,
# (2) multiple legal parties
# (3) choice on the ballot
nelda$multiparty = (nelda$oppallowed*(1-nelda$oppprevent) + 
                       nelda$multilegal + 
                       nelda$choice)/3

# process index
nelda$process = ((1-nelda$suspended) + (1-nelda$pasttl) +
                    (1-nelda$boycott))/3

```

```{r nelda_checks}

# General goal here is to find the most recent NELDA election
# Checking that NELDA is sorted by election id, which first sorts by country, then by date
# So pulling the last entry will always give the latest election in a subset
table(order(nelda$electionid)==(1:nrow(nelda)))

# also dropping purported Saudi election 
# from email communication with Susan Hyde 2/23/2023
nelda = subset(nelda, country!="Saudi Arabia")

# To define the universe of potential country-years, draw this from V-Dem
nelda_cy = subset(vdem, select=c(year, COWcode), year >= 1945)

# For some reason was getting an error here without specifying
# The rename should come from dyplr
nelda_cy <- nelda_cy %>%
  dplyr::rename(ccode = COWcode)

# Most recent election
nelda_cy$lastelec = NA
# Most recent election where incumbency contested
nelda_cy$lastelec.ic = NA
# Most recent election where incumbency not contested
nelda_cy$lastelec.inc = NA
for (i in 1:nrow(nelda_cy)){
  # All elections up to and including current year
  ids = nelda$electionid[nelda$ccode==nelda_cy$ccode[i] & nelda$year <= nelda_cy$year[i]]
  # Pull the last relevant election
  if (length(ids)>0) nelda_cy$lastelec[i] = ids[length(ids)]
  # Same idea for ic and inc elections
  ids.ic = nelda$electionid[nelda$ccode==nelda_cy$ccode[i] & 
                              nelda$year <= nelda_cy$year[i] &
                              nelda$inccontest==1]
  if (length(ids.ic)>0) nelda_cy$lastelec.ic[i] = ids.ic[length(ids.ic)]
  ids.inc = nelda$electionid[nelda$ccode==nelda_cy$ccode[i] & 
                               nelda$year <= nelda_cy$year[i] &
                               nelda$inccontest==0]
  if (length(ids.inc)>0) nelda_cy$lastelec.inc[i] = ids.inc[length(ids.inc)]
}

# Checking how many countries we can pull last election data for by year
# table(nelda_cy$year, is.na(nelda_cy$lastelec))
# table(nelda_cy$year, is.na(nelda_cy$lastelec.ic))
# table(nelda_cy$year, is.na(nelda_cy$lastelec.inc))

# Bits of nelda to bring into the last election data set
nmerge = nelda %>% 
  dplyr::select(electionid, increp, inccontest, partylose, partylose2,
                oppgain,
                suspended, oppallowed, multilegal, 
                choice, pasttl, 
                oppprevent, boycott, multiparty, process)

dim(nelda)
dim(nmerge)
dim(nelda_cy)
# Merging in last election data
# Creating three seperate data sets
nelda_le = left_join(nelda_cy, nmerge, by=c("lastelec"="electionid"))
dim(nelda_le)

nelda_ic = left_join(nelda_cy, nmerge, by=c("lastelec.ic"="electionid"))
dim(nelda_ic)

nelda_inc = left_join(nelda_cy, nmerge, by=c("lastelec.inc"="electionid"))
dim(nelda_inc)

nelda_le$timesince = nelda_le$year - as.numeric(substr(nelda_le$lastelec, 5,8))
nelda_ic$timesince = nelda_ic$year - as.numeric(substr(nelda_ic$lastelec.ic, 5,8))
nelda_inc$timesince = nelda_inc$year - as.numeric(substr(nelda_inc$lastelec.inc, 5,8))

```


```{r main nelda graphs}
# For main graphs only including country-years
# Where the latest election was within the past 6 years
# Alternative verisons show this choice doesn't affect the overall picture
setwd(outdir)
pdf("partylose_lastelec.pdf", family="Times", width=6.5, height=3)
par(mfrow=c(1,3), mar=c(3,3,1,1), mgp=c(2, .7, 0))
plotavgyr(nelda_le$partylose[nelda_le$timesince <= 6], 
          nelda_le$year[nelda_le$timesince <= 6], 
          ylim=c(0, .5), ylab="Incumbent Party Loses", minyear=1980,
          main="All Elections")
plotavgyr(nelda_ic$partylose[nelda_ic$timesince <= 6],  
          nelda_ic$year[nelda_ic$timesince <= 6], 
          ylim=c(0, .5), ylab="Incumbent Party Loses", minyear=1980,
          main="Incumbency at Stake")
plotavgyr(nelda_inc$partylose[nelda_inc$timesince <= 6],
          nelda_inc$year[nelda_inc$timesince <= 6],
          ylim=c(0, .5), ylab="Incumbent Party Loses", minyear=1980,
          main="Incumbency not at Stake")
dev.off()

pdf("opposition_process.pdf", width=6.5, height=3, family="Times")
par(mfrow=c(1,2), mar=c(3,3,1,1), mgp=c(2, .7, 0))
plotavgyr(nelda_le$multiparty[nelda_le$timesince <= 6], 
          nelda_le$year[nelda_le$timesince <= 6], 
          ylim=c(0, 1), ylab="Multiparty Index", minyear=1980)
plotavgyr(nelda_le$process[nelda_le$timesince <= 6], 
          nelda_le$year[nelda_le$timesince <= 6], 
          ylim=c(0, 1), ylab="Process Violation Index", minyear=1980)
# plotavgyr(nelda_inc$multiparty[nelda_inc$timesince <= 6],  
#           nelda_inc$year[nelda_inc$timesince <= 6],
#           ylim=c(0, 1), ylab="Incumbent Party Loses", minyear=1980)
dev.off()


```

```{r alt nelda grahs}
# Alternative versions of the main NELDA graphs
# Ended up cutting these for length but maybe of interest

# Plotting for all country-years we have a previous election
# pdf("partylose_lastelec.pdf", width=6.5, height=3, family="Times")
par(mfrow=c(1,3), mar=c(3,3,1,1), mgp=c(2, .7, 0))
plotavgyr(nelda_le$partylose, nelda_le$year, 
          ylim=c(0, .5), ylab="Incumbent Party Loses", minyear=1980)
plotavgyr(nelda_ic$partylose,  nelda_ic$year, 
          ylim=c(0, .5), ylab="Incumbent Party Loses", minyear=1980)
plotavgyr(nelda_inc$partylose,  nelda_inc$year,
          ylim=c(0, .5), ylab="Incumbent Party Loses", minyear=1980)
# dev.off()


# Plotting averages, subsetting to cases where the previous election
# was not more than 6 years ago
par(mfrow=c(1,3), mar=c(3,3,1,1), mgp=c(2, .7, 0))
plotavgyr(nelda_le$partylose[nelda_le$timesince <= 4], 
          nelda_le$year[nelda_le$timesince <= 4], 
          ylim=c(0, .5), ylab="Incumbent Party Loses", minyear=1980)
plotavgyr(nelda_ic$partylose[nelda_ic$timesince <= 4],  
          nelda_ic$year[nelda_ic$timesince <= 4], 
          ylim=c(0, .5), ylab="Incumbent Party Loses", minyear=1980)
plotavgyr(nelda_inc$partylose[nelda_inc$timesince <= 4],  
          nelda_inc$year[nelda_inc$timesince <= 4],
          ylim=c(0, .5), ylab="Incumbent Party Loses", minyear=1980)

# Alternative version where we also include cases where
# There is no party but the incumbency is contested and 
# The incumbent is replaced
par(mfrow=c(1,2), mar=c(3,3,1,1), mgp=c(2, .7, 0))
plotavgyr(nelda_le$partylose2[nelda_le$timesince <= 6],
          nelda_le$year[nelda_le$timesince <= 6], 
          ylim=c(0, .5), ylab="Incumbent Party Loses", minyear=1980)
plotavgyr(nelda_ic$partylose2[nelda_le$timesince <= 6],  
          nelda_ic$year[nelda_le$timesince <= 6], 
          ylim=c(0, .5), ylab="Incumbent Party Loses", minyear=1980)
# plotavgyr(nelda_inc$partylose2.inc[nelda_le$timesince.inc <= 6],  
#           nelda_le$year[nelda_le$timesince.inc <= 6],
#           ylim=c(0, .5), ylab="Incumbent Party Loses", minyear=1980)


# Alternative version where we look at incumbent replacement 
# and opposition gain
# pdf("increp_opgain.pdf", width=6.5, height=4, family="Times")
par(mfrow=c(2,2), mar=c(3,3,1,1), mgp=c(2, .7, 0))
plotavgyr(nelda_le$increp[nelda_le$timesince <= 6],
          nelda_le$year[nelda_le$timesince <= 6],
          ylim=c(0, .6), ylab="Incumbent Replaced", minyear=1980)
plotavgyr(nelda_ic$increp[nelda_le$timesince <= 6],
          nelda_ic$year[nelda_le$timesince <= 6],
          ylim=c(0, .6), ylab="Incumbent Replaced", minyear=1980)
# plotavgyr(nelda_le$increp.inc[nelda_le$timesince.inc <= 6], 
#           nelda_le$year[nelda_le$timesince.inc <= 6],
#           ylim=c(0, .6), ylab="Incumbent Replaced", minyear=1980)

plotavgyr(nelda_le$oppgain[nelda_le$timesince <= 6], 
          nelda_le$year[nelda_le$timesince <= 6], 
          ylim=c(0, .6), ylab="Opposition Gains", minyear=1980)
plotavgyr(nelda_ic$oppgain[nelda_ic$timesince <= 6], 
          nelda_ic$year[nelda_ic$timesince <= 6],
          ylim=c(0, .6), ylab="Opposition Gains", minyear=1980)
# plotavgyr(nelda_le$oppgain.inc, 
#           nelda_le$year, ylim=c(0, .6), ylab="Opposition Gains", minyear=1980)
# dev.off()

```

```{r dpi cleaning and graphing}
setwd(outdir)

# DPI codes a few variables as 999 or -999 when missing, so making sure
# to set all these to NA so the don't mess up the averages
summary(dpi$percent1)
dpi$percent1[dpi$percent1==-999] = NA
dpi$percent1[dpi$percent1==999] = NA

dyears = 1975:2020 
presvote_yr = tapply(dpi$percent1, dpi$year, mean, na.rm=TRUE)

# Removing the NAs for how long the current party has been in power
summary(dpi$prtyin)
dpi$prtyin[dpi$prtyin==-999] = NA

# Various versions of party in power
# trucating, looking at the median, and taking logs all 
# lessen the recent upward trend
# indicating it's mostly driven by (autocratic) outliers 
prtyin_yr = tapply(dpi$prtyin, dpi$year, mean, na.rm=TRUE)
prtyin_yr_med = tapply(dpi$prtyin, dpi$year, median, na.rm=TRUE)
lnprtyin_yr = tapply(log(dpi$prtyin), dpi$year, mean, na.rm=TRUE)
dpi$prtyin20 <- ifelse(dpi$prtyin > 20, 20, dpi$prtyin)
prtyin_yr_trunc = tapply(dpi$prtyin20,
                         dpi$year, mean, na.rm=TRUE)



# Leg data
table(dpi$allhouse)
dpi$allhouse[dpi$allhouse==-999] = NA

allhouse_yr = tapply(dpi$allhouse, dpi$year, mean, na.rm=TRUE)

# Government seats over total seats
summary(dpi$numgov/dpi$totalseats)

govshare_yr = tapply(dpi$numgov/dpi$totalseats, dpi$year, mean, na.rm=TRUE)


pdf("dpi_winning.pdf", width=6.5, height=3, family='Times')
par(mfrow=c(1,2), mar=c(3,3,1,1), mgp=c(2, .7, 0))
# How well did the current incumbent do in elections?
plot(dyears, presvote_yr, type="l", xlab="Year", ylab="Winner vote/seat share", 
     ylim=c(40, 80), xlim=c(1980, 2020),
     lwd=2, col="red")
lines(dyears, 100*govshare_yr, col="blue", lwd=2)
text(2005, 55, "President", col="red")
text(2005, 70, "Legislature", col="blue")
# lines(dyears,govvoteshare_yr)

# Party Dominance
plot(dyears, prtyin_yr, type="l", ylim=c(0,15), lwd=2, xlim=c(1980, 2020),
     xlab="Year", ylab="Party Tenure")
# lines(dyears, prtyin_yr_med, lwd=2, col="grey")
lines(dyears, prtyin_yr_trunc, lwd=2, lty=3)
text(2010, 13, "Average")
text(2005, 7, "Average (truncated)")
# text(2010, 4, "Median", col="grey")
dev.off()


table(dpi$liec)
dpi$liec[dpi$liec==-999] = NA

liec_yr = tapply(dpi$liec, dpi$year, mean, na.rm=TRUE)

table(dpi$eiec)

dpi$eiec[dpi$eiec==-999] = NA

eiec_yr = tapply(dpi$eiec, dpi$year, mean, na.rm=TRUE)


pdf("dpi_competitive.pdf", width=6.5, height=3, family='Times')
par(mfrow=c(1,2), mar=c(3,3,1,1), mgp=c(2, .7, 0))
plot(dyears, allhouse_yr, type="l", lwd=2, xlim=c(1980, 2020),
     xlab="Year", ylab="Exec controlls all")

# Competitiveness Index
plot(dyears, liec_yr, type="l", xlab="Year", ylab="Competitiveness Index",
     ylim=c(3, 7), xlim=c(1980, 2020), lwd=2, col="blue")
lines(dyears, eiec_yr, lwd=2, col="red")
text(2000, 6.5, "Legislature", col="blue")
text(2005, 5.5, "Executive", col="red")

dev.off()


```


```{r exec constraints graphs}


tl_avg = tapply(exec_cons$termlimit, exec_cons$year, mean, na.rm=TRUE)
suc_avg = tapply(exec_cons$succession, exec_cons$year, mean, na.rm=TRUE)
dis_avg = tapply(exec_cons$dismiss, exec_cons$year, mean, na.rm=TRUE)

tl_yrs = as.numeric(names(tl_avg))

tle = read.csv("Term Limit Evasion.csv")

setwd(outdir)
pdf("constraints.pdf", width=6.5, height=3, family="Times")
par(mfrow=c(1,2), mar=c(3,3,1,1), mgp=c(2, .7, 0))
plot(tl_yrs, tl_avg, type="l", lwd=2, ylim=c(.2, 1), xlim=c(1980, 2020),
     xlab="Year", ylab="Proportion with Constraint")
text(2010, .55, "Term Limit")
lines(tl_yrs, suc_avg, type="l", col=rgb(.8,0,0), lwd=2)
text(2010, .98, "Succession",col=rgb(.8,0,0))
lines(tl_yrs, dis_avg, type="l", lwd=2, col=rgb(0, .8, 0))
text(2010, .75, "Dismissal", col=rgb(0, .8, 0))

plot(tle$year, tle$attempt, type="l", ylim=c(0, 6), lwd=2, col="grey",
     xlab="Year", ylab="Term Limit Evasions")
lines(tle$year, tle$succeed, type="l", lwd=2)
text(2011, 5.5, "All Attempts", col="grey")
text(2011, .8, "Success")
dev.off()
```
```{r cpj_graph}
setwd(outdir)
#deleting header rows
cpj_killed = cpj_killed %>% 
  filter(Name != "Name")

cpj_imprison = cpj_imprison %>% 
  filter(Name != "Name")

# extracting years 
# Just a few NAs here from inconsistent entries

cpj_killed_mod = cpj_killed %>% 
  mutate(Year = as.numeric(substr(cpj_killed$Date, nchar(cpj_killed$Date)-3, nchar(cpj_killed$Date))))


cpj_imprison_mod = cpj_imprison %>% 
  mutate(Year = as.numeric(substr(cpj_imprison$Date, nchar(cpj_imprison$Date)-3, nchar(cpj_imprison$Date))))



jailtable = table(cpj_imprison_mod$Year[cpj_imprison_mod$Year >= 1992])
# yrs = 1992:2021

murdertable = table(cpj_killed_mod$Year[cpj_killed_mod$Type.of.Death=="Murder" & cpj_killed_mod$Year < 2022])

cpj_yrs = 1992:2021
  
pdf("journos2.pdf", width=6.5, height=3, family="Times")
par(mfrow=c(1,2), mar=c(3,3,1,1), mgp=c(2, .7, 0))
plot(jailtable, type="l",
     xlab="Year", ylab="Journalists Jailed", col=rgb(0,0,0, alpha=.5),
     axes=FALSE)
axis(1, at=c(1992, 2002, 2012, 2021))
axis(2, at=c(0, 70, 140))
lines(cpj_yrs, predict(loess(jailtable~cpj_yrs)), lwd=2)

plot(murdertable, type="l", xlab="Year",
     ylab="Journalists Murdered", col=rgb(0,0,0, alpha=.5), axes=FALSE)
axis(1, at=c(1992, 2002, 2012, 2021))
axis(2, at=c(0, 25, 50))
lines(cpj_yrs, predict(loess(murdertable~cpj_yrs)), lwd=2)
dev.off()


```


```{r vdem_sub}
# Now we need to merge everything for the objective index
#select relevant variables for merged data
vdem_mod = vdem %>% 
  dplyr::select(1:22, v2x_polyarchy, v2x_freexp_altinf, v2x_frassoc_thick,
                v2x_suffr, v2xel_frefair, v2x_elecoff, v2x_libdem,
                v2x_liberal, e_pop, e_mipopula, e_wb_pop, e_regiongeo, COWcode)

# Filter relevant years
# And dropping ones with no COW code which is needed for merges
# These are small countries mostly not in other datasets
vdem_mod = vdem_mod %>% 
  filter(year >= 1980 & year <= 2021 & !is.na(COWcode))

vdem_mod = vdem_mod %>% 
  dplyr::rename("ccode" = COWcode)
```

```{r fh_mod}
#rename country column in freedom house dataset
fh_mod = fh %>% 
  dplyr::rename("ccode" = cown)

#filter relevant years
fh_mod = fh_mod %>% 
  filter(year >= 1980 & year <= 2021)

# FH is missing 1981 data due to the when reports where released that year
# So we don't lose all of this year in the merge, copy the 1980 data
fh1980 = fh_mod %>%
  filter(year==1980)

fh1980$year=1981
dim(fh_mod)
fh_mod = bind_rows(fh_mod, fh1980)
dim(fh_mod)
```

```{r merge1}
#merge freedom house and vdem by country-name and year 
#only include countries that appear in both datasets 
# backsliding = inner_join(fh_mod, vdem_mod, by = c("country_name", "year"))

backsliding = inner_join(fh_mod, vdem_mod, by = c("ccode", "year"))

# checking for duplicates
dim(backsliding)
y1 = table(backsliding$year)
```

```{r polity}
#filter relevant years, match country variable name, and select relevant variables
# Polity only has the US for 2019-
polity_mod = polity %>% 
  filter(year >= 1980 & year <= 2018) %>% 
  dplyr::rename("country_name" = country) %>% 
  dplyr::select(ccode, year, polity2)
```

```{r merge2}
#merge polity score to the main dataset
backsliding = left_join(backsliding, polity_mod, by = c("ccode", "year"))
# table(backsliding$country_name[backsliding$year == 2015])
dim(backsliding)
y2 = table(backsliding$year)
```

```{r cpj}


#count the number of journalists killed/imprison by country-year 

cpj_killed_cy = cpj_killed_mod %>% 
  group_by(Location, Year) %>% 
  mutate(journalists_killed = n()) %>% 
  ungroup() %>% 
  dplyr::select(Location, Year, journalists_killed) %>% 
  distinct()

cpj_imprison_cy = cpj_imprison_mod %>% 
  group_by(Location, Year) %>% 
  mutate(journalists_imprison = n()) %>% 
  ungroup() %>% 
  dplyr::select(Location, Year, journalists_imprison) %>% 
  distinct()

#rename variables to match 
cpj_killed_cy = cpj_killed_cy %>% 
  dplyr::rename("country_name" = Location,
         "year" = Year) %>% 
  mutate(year = as.numeric(year)) %>% 
  filter(year >= 1980 & year <= 2021)

cpj_imprison_cy = cpj_imprison_cy %>% 
  dplyr::rename("country_name" = Location,
         "year" = Year) %>% 
  mutate(year = as.numeric(year)) %>% 
  filter(year >= 1980 & year <= 2021)
```

```{r merge3}
#merge journalist data to main dataset 
backsliding = left_join(backsliding, cpj_killed_cy, by = c("country_name", "year"))
backsliding = left_join(backsliding, cpj_imprison_cy, by = c("country_name", "year"))
dim(backsliding)
y3 = table(backsliding$year)
```

```{r bmr}
#rename the merging variable and subset years 
bmr_mod = bmr %>% 
  filter(year >= 1980 & year <= 2021)
```

```{r merge4}
#merge boix, miller, rosato data to main dataset 
backsliding = left_join(backsliding, bmr_mod, by = c("ccode", "year"))
dim(backsliding)
y4 = table(backsliding$year)
```


```{r exec_constraints}
#rename the merging variable and subset to relevant years 
exec_cons_mod = exec_cons %>% 
  filter(year >= 1980 & year <= 2021)
```

```{r merge6}
backsliding = left_join(backsliding, exec_cons_mod, by = c("ccode", "year"))
y5 = table(backsliding$year)
dim(backsliding)
```

```{r dpi}
dpi_mod = dpi %>% 
  filter(year >= 1980 & year <= 2021) %>% 
  dplyr::rename("country_name" = countryname)
```

```{r merge7}
backsliding = left_join(backsliding, dpi_mod, by = c("country_name", "year"))
dim(backsliding)
```

```{r merge8}
backsliding = left_join(backsliding, nelda_le, by = c("ccode", "year"))
dim(backsliding)
```


```{r making objective index}
################################################################################
# CAUTIONARY NOTE
# AS DISCUSSED IN THE FINAL VERSION OF THE PAPER
# WE DO NOT RECOMMEND USING THIS AS A DEMOCRACY SCORE ON A COUNTRY-YEAR LEVEL
# RATHER TREAT IT AS A GENERAL WAY TO TRACK AGGREGATE CHANGES OVER TIME
################################################################################
# Function to zero out NAs
zerona = function(x) ifelse(is.na(x), 0, x)

# Transformations first to get everything on a 0-1 scale
backsliding$govshare = backsliding$numgov/backsliding$totalseats
backsliding$pressharerev = 1-backsliding$percent1/100
backsliding$govsharerev = 1-backsliding$govshare
backsliding$prtyinnorm = 1-backsliding$prtyin20/20
backsliding$liec01 = backsliding$liec/7
backsliding$eiec01 = backsliding$eiec/7

# Function to make objective index from whatever columns
# Can allow for weights for later robustness check but default is equal
# Essentially just an average of the non-NA variables
makeOindex = function(dat, weights=NULL){
  dat = as.matrix(dat)
  dat0 = zerona(dat)
  # Stupid version of matrix multiplication
  datw = dat0
  wmat = dat0
  # Default weights are 1
  if (is.null(weights)) weights = rep(1, ncol(dat0))
  for (col in 1:ncol(dat0)){
      datw[,col] = weights[col]*dat0[,col]
      wmat[,col] = weights[col]*(1-is.na(dat[,col]))
  }
  nvalid = apply(wmat, 1, sum)
  varsum = apply(datw, 1, sum)
  return(varsum/nvalid)
}

# Checking there aren't any temporal patterns in NAs
# # Some trend for the vote share ones
# # Probably not a big deal given the number of vars
# table(is.na(backsliding$v2x_suffr), backsliding$year)
# table(is.na(backsliding$v2x_elecoff), backsliding$year)
# table(is.na(backsliding$pressharerev), backsliding$year)
# table(is.na(backsliding$govsharerev), backsliding$year)
# table(is.na(backsliding$prtyinnorm), backsliding$year)
# table(is.na(backsliding$liec01), backsliding$year)
# table(is.na(backsliding$eiec01), backsliding$year)
# table(is.na(backsliding$termlimit.x), backsliding$year)
# table(is.na(backsliding$dismiss), backsliding$year)
# table(is.na(backsliding$succession), backsliding$year)
# table(is.na(backsliding$partylose), backsliding$year)
# table(is.na(backsliding$increp), backsliding$year)
# table(is.na(backsliding$multiparty), backsliding$year)
# table(is.na(backsliding$process), backsliding$year)

# testdat= subset(all, select=c("v2x_suffr", "v2x_elecoff", "pressharerev", 
#                                "govsharerev", "prtyinnorm", "liec01", "eiec01",
#                                "termlimit.x", "dismiss", "succession"))
backsliding$oindex_small = 
  makeOindex(subset(backsliding,select=c("v2x_suffr","pressharerev",
                                        "govsharerev", "prtyinnorm", 
                                        "liec01", "eiec01",
                                        "termlimit.x", "dismiss",
                                        "succession")))
backsliding$oindex =
  makeOindex(subset(backsliding,select=c("v2x_suffr","pressharerev", 
                                              "govsharerev", "prtyinnorm",
                                              "liec01", "eiec01",
                                              "termlimit.x", "dismiss",
                                              "succession",
                                              "partylose",
                                              "multiparty", "process")))

cor(cbind(backsliding$oindex, backsliding$oindex_small, backsliding$v2x_polyarchy, backsliding$fh_total_reversed, backsliding$polity2), use="pairwise.complete.obs")

#Exporting csv at this point
# write.csv(backsliding,"~/Dropbox/Apps/Overleaf/media coverage of backsliding (1)/_data/all.csv")
```



```{r oindex_graph_main}


table(is.na(backsliding$e_pop), backsliding$year)

# Index is missing most data post 2020 so dropping these
backsliding$oindex[backsliding$year > 2020] <- NA

  
# Pulling in 2019 data for 2020-21
for (cont in backsliding$ccode[backsliding$year==2019]){
  if (!is.na(backsliding$e_pop[backsliding$ccode==cont & backsliding$year ==2019])){
    backsliding$e_pop[backsliding$ccode==cont & backsliding$year ==2020] = backsliding$e_pop[backsliding$ccode==cont & backsliding$year ==2019]
    backsliding$e_pop[backsliding$ccode==cont & backsliding$year ==2021] = backsliding$e_pop[backsliding$ccode==cont & backsliding$year ==2019]
  }
}


table(is.na(backsliding$e_pop), backsliding$year)


# Pop-weighted average for polyarchy, oi, fh
ed_wa= sapply(split(backsliding, backsliding$year), 
               function(x) weighted.mean(x$v2x_polyarchy, x$e_pop, na.rm=TRUE))
oi_wa= sapply(split(backsliding, backsliding$year), 
               function(x) weighted.mean(x$oindex, x$e_pop, na.rm=TRUE))
fh_wa= sapply(split(backsliding, backsliding$year), 
               function(x) weighted.mean(x$fh_total_reversed, x$e_pop, na.rm=TRUE))

backsliding_noic = backsliding[!backsliding$country_name %in% c("India", "China"),]

ed_wa_noic= sapply(split(backsliding_noic, backsliding_noic$year), 
               function(x) weighted.mean(x$v2x_polyarchy, x$e_pop, na.rm=TRUE))
oi_wa_noic= sapply(split(backsliding_noic, backsliding_noic$year), 
               function(x) weighted.mean(x$oindex, x$e_pop, na.rm=TRUE))
fh_wa_noic= sapply(split(backsliding_noic, backsliding_noic$year), 
               function(x) weighted.mean(x$fh_total_reversed, x$e_pop, na.rm=TRUE))

# Pop-weighted average/median for FH

setwd(outdir)
pdf("popweight.pdf", width=6.5, height=3, family="Times")
par(mfrow=c(1,3), mar=c(3,3,1,1), mgp=c(2, .7, 0))
plot(1980:2021, tapply(backsliding$oindex, backsliding$year, mean, na.rm=TRUE), type="l", ylim=c(.3, .8), lwd=2,
     xlab="Year", ylab="Unweighted Average of Indices")
lines(1980:2021, tapply(backsliding$v2x_polyarchy, backsliding$year, mean, na.rm=TRUE))
lines(1980:2021, tapply(backsliding$fh_total_reversed/12, backsliding$year, mean, na.rm=TRUE), col="grey")
# abline(h=mean(backsliding$oindex[backsliding$year==2020]), lty=3)
# text(2000, .78, "2020 Average")


plot(yrs, ed_wa, ylim=c(.3, .9), type="l", 
     xlab="Year", ylab="Pop-Weighted Indices")
# text(2010, .75, "Objective")
# text(2010, .55, "Polyarchy", col="grey")
lines(yrs, oi_wa, lwd=2)
lines(yrs, fh_wa/12, col="grey")

plot(yrs, ed_wa_noic, ylim=c(.3, .9), type="l",
     xlab="Year", ylab="Pop-Weighted Indices (No India/China)")
lines(yrs, oi_wa_noic, lwd=2)
lines(yrs, fh_wa_noic/12, col="grey")
# text(2010, .75, "Objective", lwd=2)
# text(2010, .6, "Polyarchy", col="grey")
dev.off()



```


```{r quantile_graph}
setwd(outdir)
yrs = 1980:2021
# Quantiles graph
pdf("quantiles.pdf", width=6.5, height=3.5, family="Times")
par(mfrow=c(1,3))
plot(yrs, tapply(backsliding$v2x_polyarchy, backsliding$year, function(x) quantile(x, .5, na.rm=TRUE)),
     type="l", lwd=2, ylim=c(0,1),
     xlab="Year", ylab="Polyarchy Quantiles")
for (qnt in seq(.1, .9, length.out=9)){
  lines(yrs, tapply(backsliding$v2x_polyarchy, backsliding$year, function(x) quantile(x, qnt, na.rm=TRUE)))
}

plot(yrs, tapply(backsliding$fh_total_reversed/12, backsliding$year, function(x) quantile(x, .5, na.rm=TRUE)),
     type="l", lwd=2, ylim=c(0,1),
     xlab="Year", ylab="Freedom House Quantiles")
for (qnt in seq(.1, .9, length.out=9)){
  lines(yrs, tapply(backsliding$fh_total_reversed/12, backsliding$year, function(x) quantile(x, qnt, na.rm=TRUE)))
}

plot(yrs, tapply((backsliding$polity2 + 10)/20, backsliding$year, function(x) quantile(x, .5, na.rm=TRUE)),
     type="l", lwd=2, ylim=c(0,1),
     xlab="Year", ylab="Polity Quantiles")
for (qnt in seq(.1, .9, length.out=9)){
  lines(yrs, tapply((backsliding$polity2 + 10)/20, backsliding$year, function(x) quantile(x, qnt, na.rm=TRUE)))
}

dev.off()

```



```{r elec_counts}
elec_count = table(nelda$year[nelda$year >= 1980], 
                    nelda$type[nelda$year >= 1980])
nyears = 1980:2020

setwd(outdir)
pdf("eleccount.pdf", width=6.5, height=3, family="Times")
par(mfrow=c(1,2), mar=c(3,3,1,1), mgp=c(2, .7, 0))
plot(nyears, elec_count[,2], type="l", col="grey",
      xlab="Year", ylab="Executive Elections")
lines(lowess(nyears, elec_count[,2]), lwd=2)

plot(nyears, elec_count[,3], type="l", col="grey",
      xlab="Year", ylab="Legislative Elections")
lines(lowess(nyears, elec_count[,3]), lwd=2)
dev.off()

```

```{r randOindex}
setwd(outdir)
pdf("Orand.pdf", width=6.5, height=4, family="Times")
plot(1980:2021, tapply(backsliding$oindex, backsliding$year, mean), type="l", ylim=c(.3, .9), lwd=2, xlim=c(1980,2020),
     xlab="Year", ylab="Objective Democracy Index")
for (i in 1:50){
  randind = makeOindex(subset(backsliding, year <= 2020,
                               select=c("v2x_suffr",
                                                     "pressharerev", 
                                             "govsharerev", "prtyinnorm",
                                             "liec01", "eiec01",
                                             "termlimit.x", "dismiss",
                                             "succession",
                                             "partylose",
                                             "multiparty", "process")),
                                    weights=runif(15))
  
 lines(1980:2020, tapply(randind, backsliding$year[backsliding$year <=2020],
                         mean), col=rgb(0,0,0, alpha=.3)) 
}
dev.off()
```

```{r bydem}

setwd(outdir)

#### By dem status (only data   for 2020 so need to stop there)
byrs = 1980:2020

plotbydem= function(var, ylim=c(0,1), ylab="Average Indicator"){
  plot(byrs, tapply(var[backsliding$democracy==1], 
                   backsliding$year[backsliding$democracy==1], mean,
                   na.rm=TRUE), type="l",
       xlab="Year", ylab=ylab, ylim=ylim, col=rgb(0, .8, 0))
  lines(byrs, tapply(var[backsliding$democracy==0], 
                    backsliding$year[backsliding$democracy==0], mean,
                    na.rm=TRUE), type="l",
        col=rgb(0, 0, .8))
  lines(yrs, tapply(var, 
                    backsliding$year, mean, na.rm=TRUE), type="l",
        lwd=2)
  lines(yrs, tapply(backsliding$democracy, backsliding$year, mean, 
                    na.rm=TRUE), type="l",
        lty=3)
  
}

pdf("bydem.pdf", width=6.5, height=3, family="Times")
par(mfrow=c(1,2), mar=c(3,3,1,1), mgp=c(2, .7, 0))
plotbydem(backsliding$partylose, ylim=c(0, .6),
          ylab="Turnover")
text(2010, .52, "Dem", col=rgb(0, .8, 0))
text(2010, .13, "Aut", col=rgb(0, 0, .8))
text(2010, .37, "All")

plotbydem(backsliding$oindex, ylim=c(.2, .9),
          ylab="Objective Index")
text(2010, .86, "Dem", col=rgb(0, .8, 0))
text(2010, .6, "Aut", col=rgb(0, 0, .8))
text(2010, .71, "All")
dev.off()


pdf("bydem_others.pdf", width=6.5, height=3, family="Times")
par(mfrow=c(1,2), mar=c(3,3,1,1), mgp=c(2, .7, 0))
plotbydem(backsliding$v2x_polyarchy, ylim=c(.1, .8),
          ylab="Polyarchy")
text(2010, .76, "Dem", col=rgb(0, .8, 0))
text(2010, .23, "Aut", col=rgb(0, 0, .8))
text(2010, .48, "All")

plotbydem(backsliding$fh_total_reversed/12, ylim=c(.1, .9),
          ylab="Freedom House")
text(2010, .76, "Dem", col=rgb(0, .8, 0))
text(2010, .23, "Aut", col=rgb(0, 0, .8))
text(2010, .48, "All")
dev.off()

```

```{r change_data_yearly}

# Changes
countries = unique(cbind(backsliding$ccode, backsliding$country_name))

clevel = data.frame(ccode=countries[,1], country=countries[,2])
clevel$odem1980 = NA
clevel$odem1990 = NA
clevel$odem2000 = NA
clevel$odem2010 = NA
clevel$odem2020 = NA
clevel$poly1980 = NA
clevel$poly1990 = NA
clevel$poly2000 = NA
clevel$poly2010 = NA
clevel$poly2020 = NA

for (i in 1:nrow(clevel)){
  if (length(backsliding$oindex[backsliding$ccode==clevel$ccode[i] & backsliding$year=="1980"] > 0)){
    clevel$odem1980[i] = backsliding$oindex[backsliding$ccode==clevel$ccode[i] & backsliding$year=="1980"]
  }
  if (length(backsliding$oindex[backsliding$ccode==clevel$ccode[i] & backsliding$year=="1990"] > 0)){
    clevel$odem1990[i] = backsliding$oindex[backsliding$ccode==clevel$ccode[i] & backsliding$year=="1990"]
  }
  if (length(backsliding$oindex[backsliding$ccode==clevel$ccode[i] & backsliding$year=="2000"] > 0)){
    clevel$odem2000[i] = backsliding$oindex[backsliding$ccode==clevel$ccode[i] & backsliding$year=="2000"]
  }
  if (length(backsliding$oindex[backsliding$ccode==clevel$ccode[i] & backsliding$year=="2010"] > 0)){
    clevel$odem2010[i] = backsliding$oindex[backsliding$ccode==clevel$ccode[i] & backsliding$year=="2010"]
  }
  if (length(backsliding$oindex[backsliding$ccode==clevel$ccode[i] & backsliding$year=="2020"] > 0)){
    clevel$odem2020[i] = backsliding$oindex[backsliding$ccode==clevel$ccode[i] & backsliding$year=="2020"]
  }
  # Now vdem
  if (length(backsliding$oindex[backsliding$ccode==clevel$ccode[i] & backsliding$year=="1980"] > 0)){
    clevel$poly1980[i] = backsliding$v2x_polyarchy[backsliding$ccode==clevel$ccode[i] & backsliding$year=="1980"]
  }
  if (length(backsliding$v2x_polyarchy[backsliding$ccode==clevel$ccode[i] & backsliding$year=="1990"] > 0)){
    clevel$poly1990[i] = backsliding$v2x_polyarchy[backsliding$ccode==clevel$ccode[i] & backsliding$year=="1990"]
  }
  if (length(backsliding$v2x_polyarchy[backsliding$ccode==clevel$ccode[i] & backsliding$year=="2000"] > 0)){
    clevel$poly2000[i] = backsliding$v2x_polyarchy[backsliding$ccode==clevel$ccode[i] & backsliding$year=="2000"]
  }
  if (length(backsliding$v2x_polyarchy[backsliding$ccode==clevel$ccode[i] & backsliding$year=="2010"] > 0)){
    clevel$poly2010[i] = backsliding$v2x_polyarchy[backsliding$ccode==clevel$ccode[i] & backsliding$year=="2010"]
  }
  if (length(backsliding$v2x_polyarchy[backsliding$ccode==clevel$ccode[i] & backsliding$year=="2020"] > 0)){
    clevel$poly2020[i] = backsliding$v2x_polyarchy[backsliding$ccode==clevel$ccode[i] & backsliding$year=="2020"]
  }
}

head(clevel)


clevel$ochange8090 = clevel$odem1990-clevel$odem1980
clevel$ochange9000 = clevel$odem2000-clevel$odem1990
clevel$ochange0010 = clevel$odem2010-clevel$odem2000
clevel$ochange1020 = clevel$odem2020-clevel$odem2010
clevel$pchange8090 = clevel$poly1990-clevel$poly1980
clevel$pchange9000 = clevel$poly2000-clevel$poly1990
clevel$pchange0010 = clevel$poly2010-clevel$poly2000
clevel$pchange1020 = clevel$poly2020-clevel$poly2010

```

```{r decades}
backsliding$decade = 1 + 10*floor((backsliding$year+1)/10)
oi_dec = tapply(backsliding$oindex, paste(backsliding$country, backsliding$decade), mean, na.rm=TRUE)
poly_dec = tapply(backsliding$v2x_polyarchy, paste(backsliding$country, backsliding$decade), mean, na.rm=TRUE) 
fh_dec = tapply(backsliding$fh_total_reversed, paste(backsliding$country, backsliding$decade), mean, na.rm=TRUE)
# Checking we can stack 
dim(oi_dec)
dim(poly_dec)
dim(fh_dec)

sum(names(oi_dec)==names(poly_dec))
sum(names(oi_dec)==names(fh_dec))

cdec = names(oi_dec)

country = substr(cdec, 1, nchar(cdec)-5)
decade = as.numeric(substr(cdec, nchar(cdec)-4, nchar(cdec)))

decdata = data.frame(cname=country, decade=decade, oi=oi_dec, poly=poly_dec, fh=fh_dec)

# Lags
decdata <- decdata %>%
  group_by(cname) %>%
  mutate(poly_l = lag(poly), oi_l = lag(oi), fh_l = lag(fh))

# Changes
decdata$poly_d <- decdata$poly - decdata$poly_l
decdata$oi_d <- decdata$oi - decdata$oi_l
decdata$fh_d <- decdata$fh - decdata$fh_l

par(mfrow=c(1,3), mar=c(3,3,1,1), mgp=c(2, .7, 0))
hist(decdata$poly_d[decdata$decade==1991])
hist(decdata$poly_d[decdata$decade==2001])
hist(decdata$poly_d[decdata$decade==2011])


par(mfrow=c(1,3), mar=c(3,3,1,1), mgp=c(2, .7, 0))
hist(decdata$oi_d[decdata$decade==1991])
hist(decdata$oi_d[decdata$decade==2001])
hist(decdata$oi_d[decdata$decade==2011])
```
```{r half_decades}
backsliding$hdecade = 1 + 5*floor((backsliding$year+1)/5)
oi_hdec = tapply(backsliding$oindex, paste(backsliding$country, backsliding$hdecade), mean, na.rm=TRUE)
poly_hdec = tapply(backsliding$v2x_polyarchy, paste(backsliding$country, backsliding$hdecade), mean, na.rm=TRUE) 
fh_hdec = tapply(backsliding$fh_total_reversed, paste(backsliding$country, backsliding$hdecade), mean, na.rm=TRUE)
# Checking we can stack 
dim(oi_hdec)
dim(poly_hdec)
dim(fh_hdec)

sum(names(oi_hdec)==names(poly_hdec))
sum(names(oi_hdec)==names(fh_hdec))

cdec = names(oi_hdec)

country = substr(cdec, 1, nchar(cdec)-5)
hdecade = as.numeric(substr(cdec, nchar(cdec)-4, nchar(cdec)))

hdecdata = data.frame(cname=country, hdecade=hdecade, 
                      oi=oi_hdec, poly=poly_hdec, fh=fh_hdec)

# Lags
hdecdata <- hdecdata %>%
  group_by(cname) %>%
  mutate(poly_l = lag(poly), oi_l = lag(oi), fh_l = lag(fh))

# Changes
hdecdata$poly_d <- hdecdata$poly - hdecdata$poly_l
hdecdata$oi_d <- hdecdata$oi - hdecdata$oi_l
hdecdata$fh_d <- hdecdata$fh - hdecdata$fh_l

# Dropping 1981 (no change) and 2021 (only 1-2 yrs data)

hdecdata <- subset(hdecdata, hdecade < 2021 & hdecade > 1981)

hdecs <- as.numeric(names(tapply(hdecdata$oi_d, hdecdata$hdecade, mean, na.rm=TRUE)))

setwd(outdir)

pdf("hdecchange.pdf", width=6.5, height=3, family="Times")
par(mfrow=c(1,3), mar=c(3,3,2,1), mgp=c(2, .7, 0))
plot(hdecs, tapply(hdecdata$oi_d, hdecdata$hdecade, mean, na.rm=TRUE), type="l", lwd=2,
     ylim=c(-.05, .15),
     xlab="Half Decade Start",
     ylab="Mean and SD of Change", main="Objective Index")
lines(hdecs, tapply(hdecdata$oi_d, hdecdata$hdecade, sd, na.rm=TRUE), lwd=2, col="grey")
text(1995, .14, "SD", col="grey")
text(1995, 0, "Mean")
plot(hdecs, tapply(hdecdata$poly_d, hdecdata$hdecade, mean, na.rm=TRUE), type="l", lwd=2,
     ylim=c(-.05, .15),
     xlab="Half Decade Start",
     ylab="Mean and SD of Change",
     main="Polyarchy")
lines(hdecs, tapply(hdecdata$poly_d, hdecdata$hdecade, sd, na.rm=TRUE), lwd=2, col="grey")
text(1995, .13, "SD", col="grey")
text(1995, 0, "Mean")
plot(hdecs, tapply(hdecdata$fh_d/12, hdecdata$hdecade, mean, na.rm=TRUE), type="l", lwd=2,
     ylim=c(-.05, .2),
     xlab="Half Decade Start",
     ylab="Mean and SD of Change",
     main="Freedom House")
lines(hdecs, tapply(hdecdata$fh_d/12, hdecdata$hdecade, sd, na.rm=TRUE), lwd=2, col="grey")
text(1995, .13, "SD", col="grey")
text(1995, 0, "Mean")
dev.off()
# 
# 
# plot(tapply(hdecdata$oi_d, hdecdata$hdecade, sd, na.rm=TRUE))
# lines(tapply(hdecdata$poly_d, hdecdata$hdecade, sd, na.rm=TRUE))
# lines(tapply(hdecdata$fh_d/12, hdecdata$hdecade, sd, na.rm=TRUE))
# 
# par(mfrow=c(1,3), mar=c(3,3,1,1), mgp=c(2, .7, 0))
# hist(hdecdata$oi_d[hdecdata$hdecade==1991])
# hist(hdecdata$oi_d[hdecdata$hdecade==2001])
# hist(hdecdata$oi_d[hdecdata$hdecade==2011])
# 
# par(mfrow=c(1,3), mar=c(3,3,1,1), mgp=c(2, .7, 0))
# hist(decdata$poly_d[decdata$decade==1991])
# hist(decdata$poly_d[decdata$decade==2001])
# hist(decdata$poly_d[decdata$decade==2011])
# 


```
```{r hdec_hist}

setwd(outdir)
bl = .05
bseq = seq(-1 - bl/2, 1, by=bl)

ymax = 110

pdf("changehist.pdf", width=6.5, height=3.5, family="Times")
par(mfrow=c(3,7), mar=c(2,1,1,.5), mgp=c(2, .7, 0))
for (hd in c(1986, 1991, 1996, 2001, 2006, 2011, 2016)){
hist(hdecdata$oi_d[hdecdata$hdecade==hd], main=paste(hd-5,"-",hd+4), ylab="",xlab="", xlim=c(-.4, .4),
     ylim=c(0, ymax),
     breaks=bseq, axes=FALSE)
  axis(1, at=c(-.4, 0, .4))
  abline(v=0, lty=2)
  if(hd==1986) mtext("Objective Index", side=2, cex=.7)
}

for (hd in c(1986, 1991, 1996, 2001, 2006, 2011, 2016)){
hist(hdecdata$poly_d[hdecdata$hdecade==hd], main="", ylab="", xlab="", xlim=c(-.4, .4),
     ylim=c(0, ymax),
     breaks=bseq, axes=FALSE)
  axis(1, at=c(-.4, 0, .4))
  abline(v=0, lty=2)
  if(hd==1986) mtext("Polyarchy", side=2, cex=.7)
}

for (hd in c(1986, 1991, 1996, 2001, 2006, 2011, 2016)){
hist(hdecdata$fh_d[hdecdata$hdecade==hd]/12, main="", ylab="", xlab="", xlim=c(-.4, .4),
     ylim=c(0, ymax),
     breaks=bseq, axes=FALSE)
  axis(1, at=c(-.4, 0, .4))
  abline(v=0, lty=2)
  if(hd==1986) mtext("Freedom House", side=2, cex=.7)
}
dev.off()

```
```{r change_graph}
setwd(outdir)

pdf("change.pdf", width=6.5, height=4.5, family="Times")
par(mfrow=c(2,2), mar=c(3,3,1,1), mgp=c(2, .7, 0))
plot(clevel$odem1980, clevel$ochange8090, ylim=c(-1,1),
     xlab="Objective Index 1980", ylab="Change O Index, 1980-1990")
abline(h=mean(clevel$ochange8090, na.rm=TRUE))
abline(h=0, lty=3)
plot(clevel$odem1990, clevel$ochange9000, ylim=c(-1,1),
     xlab="Objective Index 1990", ylab="Change O Index, 1990-2000")
abline(h=mean(clevel$ochange9000, na.rm=TRUE))
abline(h=0, lty=3)
plot(clevel$odem2000, clevel$ochange0010, ylim=c(-1,1),
     xlab="Objective Index 2000", ylab="Change O Index, 2000-2010")
abline(h=mean(clevel$ochange0010, na.rm=TRUE))
abline(h=0, lty=3)
plot(clevel$odem2010, clevel$ochange1020, ylim=c(-1,1),
     xlab="Objective Index 2010", ylab="Change O Index, 2010-2020")
abline(h=mean(clevel$ochange1020, na.rm=TRUE))
abline(h=0, lty=3)
dev.off()

```

```{r change_dist}

setwd(outdir)

plotchange = function(var, main="", xlim=c(-.5, .5), 
                       ylim=c(0, 6.5),
                       xlab="Distribution of Change"){
  hist(var[var >=xlim[1] & var <= xlim[2]], main=main, axes=FALSE, xlim=xlim, freq=FALSE, xlab=xlab, ylim=ylim)
  lines(density(var[var >=xlim[1] & var <= xlim[2]], na.rm=TRUE))
  axis(1, at=c(xlim[1],0,xlim[2]))
  axis(2)
  box()
  abline(v=c(0, mean(var, na.rm=TRUE)), lty=c(3,1))
}

pdf("odemchange.pdf", width=6.5, height=3, family="Times")
par(mfrow=c(2,2), mar=c(3,3,1,1), mgp=c(2, .7, 0))
plotchange(clevel$ochange8090, xlab="Dist'n of O Index Change, 1980-1990")
plotchange(clevel$ochange9000, xlab="Dist'n of O Index Change, 1990-2000")
plotchange(clevel$ochange0010, xlab="Dist'n of O Index Change, 2000-2010")
plotchange(clevel$ochange1020, xlab="Dist'n of O Index Change, 2010-2020")
dev.off()

pdf("vdemchange.pdf", width=6.5, height=3, family="Times")
par(mfrow=c(2,2), mar=c(3,3,1,1), mgp=c(2, .7, 0))
plotchange(clevel$pchange8090, xlab="Dist'n of Polyarchy Change, 1980-1990", 
           ylim=c(0, 10))
plotchange(clevel$pchange9000, xlab="Dist'n of Polyarchy Change, 1990-2000", 
           ylim=c(0, 10))
plotchange(clevel$pchange0010, xlab="Dist'n of Polyarchy Change, 2000-2010", 
           ylim=c(0, 10))
plotchange(clevel$pchange1020, xlab="Dist'n of Polyarchy Change, 2010-2020", 
           ylim=c(0, 10))
dev.off()


```


```{r region}
# Kamya's guess at mapping from region to V-dem reports

# table(backsliding$e_regiongeo)


backsliding$region = recode(backsliding$e_regiongeo, 
  "1" = "WENA", 
  "2" = "WENA", 
  "3" = "WENA",
  "4" = "EECA",
  "5" = "MENA",
  "6" = "SSA",
  "7" = "SSA",
  "8" = "SSA",
  "9" = "SSA",
  "10" = "MENA",
  "11" = "EECA",
  "12" = "AP",
  "13" = "AP",
  "14" = "AP",
  "15" = "AP",
  "16" = "WENA",
  "17" = "LAC",
  "18" = "LAC",
  "19" = "LAC",
  .default="NA")
# table(backsliding$region)

backslidingAP = subset(backsliding, region=="AP")
backslidingEECA = subset(backsliding, region=="EECA")
backslidingLAC = subset(backsliding, region=="LAC")
backslidingMENA = subset(backsliding, region=="MENA")
backslidingSSA = subset(backsliding, region=="SSA")
backslidingWENA = subset(backsliding, region=="WENA")

ecount = table(backsliding$country_name[backsliding$region=="WENA"])

stable = names(ecount)[ecount ==42]

polyAP = tapply(backslidingAP$v2x_polyarchy,
                backslidingAP$year,
                mean, na.rm=TRUE)
polyEECA = tapply(backslidingEECA$v2x_polyarchy,
                backslidingEECA$year,
                mean, na.rm=TRUE)
polyLAC = tapply(backslidingLAC$v2x_polyarchy,
                backslidingLAC$year,
                mean, na.rm=TRUE)
polyMENA = tapply(backslidingMENA$v2x_polyarchy,
                backslidingMENA$year,
                mean, na.rm=TRUE)
polySSA = tapply(backslidingSSA$v2x_polyarchy,
                backslidingSSA$year,
                mean, na.rm=TRUE)
polyWENA = tapply(backslidingWENA$v2x_polyarchy,
                backslidingWENA$year,
                mean, na.rm=TRUE)

polyWENAstable = tapply(backslidingWENA$v2x_polyarchy[backsliding$country_name %in% stable],
                backslidingWENA$year[backsliding$country_name %in% stable],
                mean, na.rm=TRUE)

# plot(polyWENAstable, type="l")

oiAP = tapply(backslidingAP$oindex,
                backslidingAP$year,
                mean, na.rm=TRUE)
oiEECA = tapply(backslidingEECA$oindex,
                backslidingEECA$year,
                mean, na.rm=TRUE)
oiLAC = tapply(backslidingLAC$oindex,
                backslidingLAC$year,
                mean, na.rm=TRUE)
oiMENA = tapply(backslidingMENA$oindex,
                backslidingMENA$year,
                mean, na.rm=TRUE)
oiSSA = tapply(backslidingSSA$oindex,
                backslidingSSA$year,
                mean, na.rm=TRUE)
oiWENA = tapply(backslidingWENA$oindex,
                backslidingWENA$year,
                mean, na.rm=TRUE)

fhAP = tapply(backslidingAP$fh_total_reversed/12,
                backslidingAP$year,
                mean, na.rm=TRUE)
fhEECA = tapply(backslidingEECA$fh_total_reversed/12,
                backslidingEECA$year,
                mean, na.rm=TRUE)
fhLAC = tapply(backslidingLAC$fh_total_reversed/12,
                backslidingLAC$year,
                mean, na.rm=TRUE)
fhMENA = tapply(backslidingMENA$fh_total_reversed/12,
                backslidingMENA$year,
                mean, na.rm=TRUE)
fhSSA = tapply(backslidingSSA$fh_total_reversed/12,
                backslidingSSA$year,
                mean, na.rm=TRUE)
fhWENA = tapply(backslidingWENA$fh_total_reversed/12,
                backslidingWENA$year,
                mean, na.rm=TRUE)

yrs <- as.numeric(names(polyAP))

setwd(outdir)

pdf("regions.pdf", width=6.5, height=3)
par(mfrow=c(1,3), mar=c(3,3,1,1), mgp=c(2, .7, 0))
plot(yrs, oiAP, type="l", ylim=c(.4, 1), lwd=2,
     xlab="Year", ylab="Objective Index")
lines(yrs, oiEECA, col="grey", lwd=2)
lines(yrs, oiLAC, col="red", lwd=2)
lines(yrs, oiMENA, col="dark green", lwd=2)
lines(yrs, oiSSA, col="blue", lwd=2)
lines(yrs, oiWENA, col="orange", lwd=2)
legend(1980, 1.03, legend = c("W Europe/N America",
                           "Lat Am./Caribbean", 
                           "Asia/Pacific",
                           "E Europe/C Asia", 
                           "Sub-Saharan Africa",
                           "Mid East/N Africa"
                           ),
       col = c("orange", "red", "black","grey","blue", "dark green"),
       lwd = 2, 
      cex = 0.8, box.col=rgb(1,1,1, alpha=0))

plot(yrs, polyAP, type="l", ylim=c(.1, 1), lwd=2,
     xlab="Year", ylab="Polyarchy")
lines(yrs, polyEECA, col="grey", lwd=2)
lines(yrs, polyLAC, col="red", lwd=2)
lines(yrs, polyMENA, col="dark green", lwd=2)
lines(yrs, polySSA, col="blue", lwd=2)
lines(yrs, polyWENA, col="orange", lwd=2)

plot(yrs, fhAP, type="l", ylim=c(.1, 1), lwd=2,
     xlab="Year", ylab="Freedom House")
lines(yrs, fhEECA, col="grey", lwd=2)
lines(yrs, fhLAC, col="red", lwd=2)
lines(yrs, fhMENA, col="dark green", lwd=2)
lines(yrs, fhSSA, col="blue", lwd=2)
lines(yrs, fhWENA, col="orange", lwd=2)



# Adding the legend to the left of all three panels

# Reset the xpd parameter to its default value
# par(xpd = FALSE)

```

```{r balanced}
# Checking how often each country code shows up in the final data
maxyrs = max(table(backsliding$ccode))
fullpanelcc = names(table(backsliding$ccode))[table(backsliding$ccode)==maxyrs]
fullpanelcc = as.numeric(fullpanelcc)
table(backsliding$ccode %in% fullpanelcc)

# Balanced panel of NELDA
nelda_le_fp = subset(nelda_le, nelda_le$ccode %in% fullpanelcc)




# Balanced panel of full data
fullpanel = subset(backsliding, ccode %in% fullpanelcc)
dim(fullpanel)


setwd(outdir)
pdf("balancedpanel.pdf", width=6.5, height=3, family="Times")
par(mfrow=c(1,2), mar=c(3,3,1,1), mgp=c(2, .7, 0))

plotavgyr(nelda_le_fp$partylose[nelda_le_fp$timesince <= 6], 
          nelda_le_fp$year[nelda_le_fp$timesince <= 6], 
          ylim=c(0, .5), ylab="Incumbent Party Loses", minyear=1980)

# Redoing main index graphs of full panel


plot(1980:2021, tapply(fullpanel$oindex, fullpanel$year, mean, na.rm=TRUE), type="l", ylim=c(.3, .8), lwd=2,
     xlab="Year", ylab="Unweighted Average of Indices")
lines(1980:2021, tapply(fullpanel$v2x_polyarchy, fullpanel$year, mean, na.rm=TRUE))
lines(1980:2021, tapply(fullpanel$fh_total_reversed/12, fullpanel$year, mean, na.rm=TRUE), col="grey")
dev.off()

```

```{r export}
# For those who don't want to run all of this
write.csv(backsliding,"~/Dropbox/Apps/Overleaf/media coverage of backsliding (1)/_data/all.csv")

```
